{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DAY10 groupby"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option('display.max_rows',10)\n",
    "pd.set_option('display.max_columns',10)\n",
    "pd.set_option('display.precision',2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'https://raw.githubusercontent.com/datoujinggzj/DataScienceCrashCourse/master/data/Iris.csv'\n",
    "df = pd.read_csv(url)\n",
    "feature_cols = df.columns[1:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下面给出了一系列的比较实用的一些aggregation function。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[![baOlU1.png](https://s4.ax1x.com/2022/03/04/baOlU1.png)](https://imgtu.com/i/baOlU1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 基础内容\n",
    "\n",
    "- groupby对象性质：https://pandas.pydata.org/docs/reference/groupby.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000022BE5D3F550>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Iris-setosa': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], 'Iris-versicolor': [50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], 'Iris-virginica': [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149]}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').groups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Iris-setosa': array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,\n",
       "        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
       "        34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49],\n",
       "       dtype=int64),\n",
       " 'Iris-versicolor': array([50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,\n",
       "        67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,\n",
       "        84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],\n",
       "       dtype=int64),\n",
       " 'Iris-virginica': array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,\n",
       "        113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,\n",
       "        126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,\n",
       "        139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149], dtype=int64)}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "      <th>Species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>46</td>\n",
       "      <td>4.8</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.3</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>47</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.8</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>48</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>49</td>\n",
       "      <td>5.3</td>\n",
       "      <td>3.7</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>50</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.3</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>50 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species\n",
       "0    1            5.1           3.5            1.4           0.2  Iris-setosa\n",
       "1    2            4.9           3.0            1.4           0.2  Iris-setosa\n",
       "2    3            4.7           3.2            1.3           0.2  Iris-setosa\n",
       "3    4            4.6           3.1            1.5           0.2  Iris-setosa\n",
       "4    5            5.0           3.6            1.4           0.2  Iris-setosa\n",
       "..  ..            ...           ...            ...           ...          ...\n",
       "45  46            4.8           3.0            1.4           0.3  Iris-setosa\n",
       "46  47            5.1           3.8            1.6           0.2  Iris-setosa\n",
       "47  48            4.6           3.2            1.4           0.2  Iris-setosa\n",
       "48  49            5.3           3.7            1.5           0.2  Iris-setosa\n",
       "49  50            5.0           3.3            1.4           0.2  Iris-setosa\n",
       "\n",
       "[50 rows x 6 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').get_group('Iris-setosa')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Iris-setosa</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-virginica</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  \\\n",
       "Species                                                             \n",
       "Iris-setosa      True           True          True           True   \n",
       "Iris-versicolor  True           True          True           True   \n",
       "Iris-virginica   True           True          True           True   \n",
       "\n",
       "                 PetalWidthCm  \n",
       "Species                        \n",
       "Iris-setosa              True  \n",
       "Iris-versicolor          True  \n",
       "Iris-virginica           True  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').all()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').ngroups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Species\n",
       "Iris-setosa        50\n",
       "Iris-versicolor    50\n",
       "Iris-virginica     50\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "      <th>Species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>51</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.2</td>\n",
       "      <td>4.7</td>\n",
       "      <td>1.4</td>\n",
       "      <td>Iris-versicolor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>52</td>\n",
       "      <td>6.4</td>\n",
       "      <td>3.2</td>\n",
       "      <td>4.5</td>\n",
       "      <td>1.5</td>\n",
       "      <td>Iris-versicolor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>53</td>\n",
       "      <td>6.9</td>\n",
       "      <td>3.1</td>\n",
       "      <td>4.9</td>\n",
       "      <td>1.5</td>\n",
       "      <td>Iris-versicolor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100</th>\n",
       "      <td>101</td>\n",
       "      <td>6.3</td>\n",
       "      <td>3.3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>Iris-virginica</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101</th>\n",
       "      <td>102</td>\n",
       "      <td>5.8</td>\n",
       "      <td>2.7</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.9</td>\n",
       "      <td>Iris-virginica</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>103</td>\n",
       "      <td>7.1</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.9</td>\n",
       "      <td>2.1</td>\n",
       "      <td>Iris-virginica</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \\\n",
       "0      1            5.1           3.5            1.4           0.2   \n",
       "1      2            4.9           3.0            1.4           0.2   \n",
       "2      3            4.7           3.2            1.3           0.2   \n",
       "50    51            7.0           3.2            4.7           1.4   \n",
       "51    52            6.4           3.2            4.5           1.5   \n",
       "52    53            6.9           3.1            4.9           1.5   \n",
       "100  101            6.3           3.3            6.0           2.5   \n",
       "101  102            5.8           2.7            5.1           1.9   \n",
       "102  103            7.1           3.0            5.9           2.1   \n",
       "\n",
       "             Species  \n",
       "0        Iris-setosa  \n",
       "1        Iris-setosa  \n",
       "2        Iris-setosa  \n",
       "50   Iris-versicolor  \n",
       "51   Iris-versicolor  \n",
       "52   Iris-versicolor  \n",
       "100   Iris-virginica  \n",
       "101   Iris-virginica  \n",
       "102   Iris-virginica  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "      <th>Species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>48</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>49</td>\n",
       "      <td>5.3</td>\n",
       "      <td>3.7</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>50</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.3</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>98</td>\n",
       "      <td>6.2</td>\n",
       "      <td>2.9</td>\n",
       "      <td>4.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>Iris-versicolor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>99</td>\n",
       "      <td>5.1</td>\n",
       "      <td>2.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.1</td>\n",
       "      <td>Iris-versicolor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>100</td>\n",
       "      <td>5.7</td>\n",
       "      <td>2.8</td>\n",
       "      <td>4.1</td>\n",
       "      <td>1.3</td>\n",
       "      <td>Iris-versicolor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>147</th>\n",
       "      <td>148</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.2</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Iris-virginica</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>149</td>\n",
       "      <td>6.2</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.4</td>\n",
       "      <td>2.3</td>\n",
       "      <td>Iris-virginica</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>150</td>\n",
       "      <td>5.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.8</td>\n",
       "      <td>Iris-virginica</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \\\n",
       "47    48            4.6           3.2            1.4           0.2   \n",
       "48    49            5.3           3.7            1.5           0.2   \n",
       "49    50            5.0           3.3            1.4           0.2   \n",
       "97    98            6.2           2.9            4.3           1.3   \n",
       "98    99            5.1           2.5            3.0           1.1   \n",
       "99   100            5.7           2.8            4.1           1.3   \n",
       "147  148            6.5           3.0            5.2           2.0   \n",
       "148  149            6.2           3.4            5.4           2.3   \n",
       "149  150            5.9           3.0            5.1           1.8   \n",
       "\n",
       "             Species  \n",
       "47       Iris-setosa  \n",
       "48       Iris-setosa  \n",
       "49       Iris-setosa  \n",
       "97   Iris-versicolor  \n",
       "98   Iris-versicolor  \n",
       "99   Iris-versicolor  \n",
       "147   Iris-virginica  \n",
       "148   Iris-virginica  \n",
       "149   Iris-virginica  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').tail(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Iris-setosa</th>\n",
       "      <td>1</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <td>51</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.2</td>\n",
       "      <td>4.7</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-virginica</th>\n",
       "      <td>101</td>\n",
       "      <td>6.3</td>\n",
       "      <td>3.3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm\n",
       "Species                                                                       \n",
       "Iris-setosa        1            5.1           3.5            1.4           0.2\n",
       "Iris-versicolor   51            7.0           3.2            4.7           1.4\n",
       "Iris-virginica   101            6.3           3.3            6.0           2.5"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').first()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Iris-setosa</th>\n",
       "      <td>50</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.3</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <td>100</td>\n",
       "      <td>5.7</td>\n",
       "      <td>2.8</td>\n",
       "      <td>4.1</td>\n",
       "      <td>1.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-virginica</th>\n",
       "      <td>150</td>\n",
       "      <td>5.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm\n",
       "Species                                                                       \n",
       "Iris-setosa       50            5.0           3.3            1.4           0.2\n",
       "Iris-versicolor  100            5.7           2.8            4.1           1.3\n",
       "Iris-virginica   150            5.9           3.0            5.1           1.8"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').last()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Iris-setosa</th>\n",
       "      <td>10</td>\n",
       "      <td>4.9</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <td>60</td>\n",
       "      <td>5.2</td>\n",
       "      <td>2.7</td>\n",
       "      <td>3.9</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-virginica</th>\n",
       "      <td>110</td>\n",
       "      <td>7.2</td>\n",
       "      <td>3.6</td>\n",
       "      <td>6.1</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm\n",
       "Species                                                                       \n",
       "Iris-setosa       10            4.9           3.1            1.5           0.1\n",
       "Iris-versicolor   60            5.2           2.7            3.9           1.4\n",
       "Iris-virginica   110            7.2           3.6            6.1           2.5"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').nth(9)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "      <th>Species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>28</td>\n",
       "      <td>5.2</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>36</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>41</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.3</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>68</td>\n",
       "      <td>5.8</td>\n",
       "      <td>2.7</td>\n",
       "      <td>4.1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Iris-versicolor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>78</th>\n",
       "      <td>79</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.9</td>\n",
       "      <td>4.5</td>\n",
       "      <td>1.5</td>\n",
       "      <td>Iris-versicolor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93</th>\n",
       "      <td>94</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.3</td>\n",
       "      <td>3.3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Iris-versicolor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115</th>\n",
       "      <td>116</td>\n",
       "      <td>6.4</td>\n",
       "      <td>3.2</td>\n",
       "      <td>5.3</td>\n",
       "      <td>2.3</td>\n",
       "      <td>Iris-virginica</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>132</th>\n",
       "      <td>133</td>\n",
       "      <td>6.4</td>\n",
       "      <td>2.8</td>\n",
       "      <td>5.6</td>\n",
       "      <td>2.2</td>\n",
       "      <td>Iris-virginica</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>142</th>\n",
       "      <td>143</td>\n",
       "      <td>5.8</td>\n",
       "      <td>2.7</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.9</td>\n",
       "      <td>Iris-virginica</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \\\n",
       "27    28            5.2           3.5            1.5           0.2   \n",
       "35    36            5.0           3.2            1.2           0.2   \n",
       "40    41            5.0           3.5            1.3           0.3   \n",
       "67    68            5.8           2.7            4.1           1.0   \n",
       "78    79            6.0           2.9            4.5           1.5   \n",
       "93    94            5.0           2.3            3.3           1.0   \n",
       "115  116            6.4           3.2            5.3           2.3   \n",
       "132  133            6.4           2.8            5.6           2.2   \n",
       "142  143            5.8           2.7            5.1           1.9   \n",
       "\n",
       "             Species  \n",
       "27       Iris-setosa  \n",
       "35       Iris-setosa  \n",
       "40       Iris-setosa  \n",
       "67   Iris-versicolor  \n",
       "78   Iris-versicolor  \n",
       "93   Iris-versicolor  \n",
       "115   Iris-virginica  \n",
       "132   Iris-virginica  \n",
       "142   Iris-virginica  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species').sample(n=3,random_state = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>...</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th colspan=\"4\" halign=\"left\">PetalWidthCm</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>open</th>\n",
       "      <th>high</th>\n",
       "      <th>low</th>\n",
       "      <th>close</th>\n",
       "      <th>open</th>\n",
       "      <th>...</th>\n",
       "      <th>close</th>\n",
       "      <th>open</th>\n",
       "      <th>high</th>\n",
       "      <th>low</th>\n",
       "      <th>close</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Iris-setosa</th>\n",
       "      <td>5.1</td>\n",
       "      <td>5.8</td>\n",
       "      <td>4.3</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>...</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0.6</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.9</td>\n",
       "      <td>5.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>...</td>\n",
       "      <td>4.1</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1.8</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-virginica</th>\n",
       "      <td>6.3</td>\n",
       "      <td>7.9</td>\n",
       "      <td>4.9</td>\n",
       "      <td>5.9</td>\n",
       "      <td>3.3</td>\n",
       "      <td>...</td>\n",
       "      <td>5.1</td>\n",
       "      <td>2.5</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                SepalLengthCm                 SepalWidthCm  ... PetalLengthCm  \\\n",
       "                         open high  low close         open  ...         close   \n",
       "Species                                                     ...                 \n",
       "Iris-setosa               5.1  5.8  4.3   5.0          3.5  ...           1.4   \n",
       "Iris-versicolor           7.0  7.0  4.9   5.7          3.2  ...           4.1   \n",
       "Iris-virginica            6.3  7.9  4.9   5.9          3.3  ...           5.1   \n",
       "\n",
       "                PetalWidthCm                  \n",
       "                        open high  low close  \n",
       "Species                                       \n",
       "Iris-setosa              0.2  0.6  0.1   0.2  \n",
       "Iris-versicolor          1.4  1.8  1.0   1.3  \n",
       "Iris-virginica           2.5  2.5  1.4   1.8  \n",
       "\n",
       "[3 rows x 16 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species')[feature_cols].ohlc() # 用于股票数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 描述性统计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Iris-setosa</th>\n",
       "      <td>5.8</td>\n",
       "      <td>4.4</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <td>7.0</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-virginica</th>\n",
       "      <td>7.9</td>\n",
       "      <td>3.8</td>\n",
       "      <td>6.9</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm\n",
       "Species                                                                  \n",
       "Iris-setosa                5.8           4.4            1.9           0.6\n",
       "Iris-versicolor            7.0           3.4            5.1           1.8\n",
       "Iris-virginica             7.9           3.8            6.9           2.5"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算每个类别的四个特征的最大值。\n",
    "df.groupby('Species')[feature_cols].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Species</th>\n",
       "      <th>Iris-setosa</th>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <th>Iris-virginica</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">SepalLengthCm</th>\n",
       "      <th>count</th>\n",
       "      <td>50.00</td>\n",
       "      <td>50.00</td>\n",
       "      <td>50.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>5.01</td>\n",
       "      <td>5.94</td>\n",
       "      <td>6.59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.35</td>\n",
       "      <td>0.52</td>\n",
       "      <td>0.64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>4.30</td>\n",
       "      <td>4.90</td>\n",
       "      <td>4.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>4.80</td>\n",
       "      <td>5.60</td>\n",
       "      <td>6.22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">PetalWidthCm</th>\n",
       "      <th>min</th>\n",
       "      <td>0.10</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.20</td>\n",
       "      <td>1.20</td>\n",
       "      <td>1.80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.20</td>\n",
       "      <td>1.30</td>\n",
       "      <td>2.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>0.30</td>\n",
       "      <td>1.50</td>\n",
       "      <td>2.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>0.60</td>\n",
       "      <td>1.80</td>\n",
       "      <td>2.50</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>32 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "Species              Iris-setosa  Iris-versicolor  Iris-virginica\n",
       "SepalLengthCm count        50.00            50.00           50.00\n",
       "              mean          5.01             5.94            6.59\n",
       "              std           0.35             0.52            0.64\n",
       "              min           4.30             4.90            4.90\n",
       "              25%           4.80             5.60            6.22\n",
       "...                          ...              ...             ...\n",
       "PetalWidthCm  min           0.10             1.00            1.40\n",
       "              25%           0.20             1.20            1.80\n",
       "              50%           0.20             1.30            2.00\n",
       "              75%           0.30             1.50            2.30\n",
       "              max           0.60             1.80            2.50\n",
       "\n",
       "[32 rows x 3 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算每个类别的四个特征的描述性统计指标汇总\n",
    "df.groupby('Species')[feature_cols].describe().T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div class=\"alert alert-block alert-success\"><b>Step 1</b>: 对各组进行描述性统计：第三四分位数（Q3）和第一四分位数（Q1）</div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Iris-setosa</th>\n",
       "      <th>0.25</th>\n",
       "      <td>4.80</td>\n",
       "      <td>3.12</td>\n",
       "      <td>1.40</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0.50</th>\n",
       "      <td>5.00</td>\n",
       "      <td>3.40</td>\n",
       "      <td>1.50</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0.75</th>\n",
       "      <td>5.20</td>\n",
       "      <td>3.68</td>\n",
       "      <td>1.58</td>\n",
       "      <td>0.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Iris-versicolor</th>\n",
       "      <th>0.25</th>\n",
       "      <td>5.60</td>\n",
       "      <td>2.52</td>\n",
       "      <td>4.00</td>\n",
       "      <td>1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0.50</th>\n",
       "      <td>5.90</td>\n",
       "      <td>2.80</td>\n",
       "      <td>4.35</td>\n",
       "      <td>1.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0.75</th>\n",
       "      <td>6.30</td>\n",
       "      <td>3.00</td>\n",
       "      <td>4.60</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Iris-virginica</th>\n",
       "      <th>0.25</th>\n",
       "      <td>6.22</td>\n",
       "      <td>2.80</td>\n",
       "      <td>5.10</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0.50</th>\n",
       "      <td>6.50</td>\n",
       "      <td>3.00</td>\n",
       "      <td>5.55</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0.75</th>\n",
       "      <td>6.90</td>\n",
       "      <td>3.18</td>\n",
       "      <td>5.88</td>\n",
       "      <td>2.3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm\n",
       "Species                                                                       \n",
       "Iris-setosa     0.25           4.80          3.12           1.40           0.2\n",
       "                0.50           5.00          3.40           1.50           0.2\n",
       "                0.75           5.20          3.68           1.58           0.3\n",
       "Iris-versicolor 0.25           5.60          2.52           4.00           1.2\n",
       "                0.50           5.90          2.80           4.35           1.3\n",
       "                0.75           6.30          3.00           4.60           1.5\n",
       "Iris-virginica  0.25           6.22          2.80           5.10           1.8\n",
       "                0.50           6.50          3.00           5.55           2.0\n",
       "                0.75           6.90          3.18           5.88           2.3"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species')[feature_cols].quantile([.25,.5,.75])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "那么如何同时计算多个统计指标呢？使用`agg()`函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Species</th>\n",
       "      <th>Iris-setosa</th>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <th>Iris-virginica</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">SepalLengthCm</th>\n",
       "      <th>mean</th>\n",
       "      <td>5.01</td>\n",
       "      <td>5.94</td>\n",
       "      <td>6.59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>var</th>\n",
       "      <td>0.12</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.35</td>\n",
       "      <td>0.52</td>\n",
       "      <td>0.64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>median</th>\n",
       "      <td>5.00</td>\n",
       "      <td>5.90</td>\n",
       "      <td>6.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>4.30</td>\n",
       "      <td>4.90</td>\n",
       "      <td>4.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">PetalWidthCm</th>\n",
       "      <th>var</th>\n",
       "      <td>0.01</td>\n",
       "      <td>0.04</td>\n",
       "      <td>0.08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.11</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>median</th>\n",
       "      <td>0.20</td>\n",
       "      <td>1.30</td>\n",
       "      <td>2.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.10</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>0.60</td>\n",
       "      <td>1.80</td>\n",
       "      <td>2.50</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>24 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "Species               Iris-setosa  Iris-versicolor  Iris-virginica\n",
       "SepalLengthCm mean           5.01             5.94            6.59\n",
       "              var            0.12             0.27            0.40\n",
       "              std            0.35             0.52            0.64\n",
       "              median         5.00             5.90            6.50\n",
       "              min            4.30             4.90            4.90\n",
       "...                           ...              ...             ...\n",
       "PetalWidthCm  var            0.01             0.04            0.08\n",
       "              std            0.11             0.20            0.27\n",
       "              median         0.20             1.30            2.00\n",
       "              min            0.10             1.00            1.40\n",
       "              max            0.60             1.80            2.50\n",
       "\n",
       "[24 rows x 3 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "stats_list = [np.mean, np.var,'std','median','min','max'] # str function\n",
    "\n",
    "df.groupby('Species')[feature_cols].agg(stats_list).T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div class=\"alert alert-block alert-success\"><b>Step 2</b>：\n",
    "那么如何对不同的列求不同的统计量？ \n",
    "请对:\n",
    "    \n",
    "- SepalLengthCm计算mean\n",
    "- SepalWidthCm计算var\n",
    "- PetalLengthCm计算max\n",
    "- PetalWidthCm计算min\n",
    "    \n",
    "并使用rename函数对index和column进行重命名。\n",
    "</div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "agg_mapping = {\n",
    "    'SepalLengthCm':'mean',\n",
    "    'SepalWidthCm' :'var',\n",
    "    'PetalLengthCm':'max',\n",
    "    'PetalWidthCm':'min'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SepalLengthC的均值</th>\n",
       "      <th>SepalWidthCm的方差</th>\n",
       "      <th>PetalLengthCm的最大值</th>\n",
       "      <th>PetalWidthCm的最小值</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Iris-setosa</th>\n",
       "      <td>5.01</td>\n",
       "      <td>0.15</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <td>5.94</td>\n",
       "      <td>0.10</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-virginica</th>\n",
       "      <td>6.59</td>\n",
       "      <td>0.10</td>\n",
       "      <td>6.9</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 SepalLengthC的均值  SepalWidthCm的方差  PetalLengthCm的最大值  \\\n",
       "Species                                                                \n",
       "Iris-setosa                 5.01             0.15                1.9   \n",
       "Iris-versicolor             5.94             0.10                5.1   \n",
       "Iris-virginica              6.59             0.10                6.9   \n",
       "\n",
       "                 PetalWidthCm的最小值  \n",
       "Species                            \n",
       "Iris-setosa                   0.1  \n",
       "Iris-versicolor               1.0  \n",
       "Iris-virginica                1.4  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species')[feature_cols].agg(agg_mapping).rename(columns = {'SepalLengthCm':'SepalLengthC的均值',\n",
    "                                                         'SepalWidthCm':'SepalWidthCm的方差',\n",
    "                                                         'PetalLengthCm':'PetalLengthCm的最大值',\n",
    "                                                         'PetalWidthCm':'PetalWidthCm的最小值'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SepalLengthC的均值</th>\n",
       "      <th>SepalWidthCm的方差</th>\n",
       "      <th>PetalLengthCm的最大值</th>\n",
       "      <th>PetalWidthCm的最小值</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>setosa</th>\n",
       "      <td>5.01</td>\n",
       "      <td>0.15</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>versocolor</th>\n",
       "      <td>5.94</td>\n",
       "      <td>0.10</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>virginica</th>\n",
       "      <td>6.59</td>\n",
       "      <td>0.10</td>\n",
       "      <td>6.9</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            SepalLengthC的均值  SepalWidthCm的方差  PetalLengthCm的最大值  \\\n",
       "Species                                                           \n",
       "setosa                 5.01             0.15                1.9   \n",
       "versocolor             5.94             0.10                5.1   \n",
       "virginica              6.59             0.10                6.9   \n",
       "\n",
       "            PetalWidthCm的最小值  \n",
       "Species                       \n",
       "setosa                   0.1  \n",
       "versocolor               1.0  \n",
       "virginica                1.4  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species')[feature_cols].agg(agg_mapping).rename(columns = {'SepalLengthCm':'SepalLengthC的均值',\n",
    "                                                         'SepalWidthCm':'SepalWidthCm的方差',\n",
    "                                                         'PetalLengthCm':'PetalLengthCm的最大值',\n",
    "                                                         'PetalWidthCm':'PetalWidthCm的最小值'},\n",
    "                                                        index = {\n",
    "                                                            'Iris-setosa':'setosa',\n",
    "                                                            'Iris-versicolor':'versocolor',\n",
    "                                                            'Iris-virginica':'virginica'\n",
    "                                                        })\n",
    "\n",
    "# 答案应该和下面完全一样！"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SepalLengthC的均值</th>\n",
       "      <th>SepalWidthCm的方差</th>\n",
       "      <th>PetalLengthCm的最大值</th>\n",
       "      <th>PetalWidthCm的最小值</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Iris-setosa</th>\n",
       "      <td>5.01</td>\n",
       "      <td>0.15</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <td>5.94</td>\n",
       "      <td>0.10</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-virginica</th>\n",
       "      <td>6.59</td>\n",
       "      <td>0.10</td>\n",
       "      <td>6.9</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 SepalLengthC的均值  SepalWidthCm的方差  PetalLengthCm的最大值  \\\n",
       "Species                                                                \n",
       "Iris-setosa                 5.01             0.15                1.9   \n",
       "Iris-versicolor             5.94             0.10                5.1   \n",
       "Iris-virginica              6.59             0.10                6.9   \n",
       "\n",
       "                 PetalWidthCm的最小值  \n",
       "Species                            \n",
       "Iris-setosa                   0.1  \n",
       "Iris-versicolor               1.0  \n",
       "Iris-virginica                1.4  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species')[feature_cols].agg(\n",
    "    SepalLengthC的均值 = ('SepalLengthCm','mean'),\n",
    "    SepalWidthCm的方差 = ('SepalWidthCm',lambda x: x.var()),\n",
    "    PetalLengthCm的最大值 = ('PetalLengthCm','max'),\n",
    "    PetalWidthCm的最小值 = ('PetalWidthCm','min')\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SepalLengthC的均值</th>\n",
       "      <th>SepalWidthCm的方差</th>\n",
       "      <th>PetalLengthCm的最大值</th>\n",
       "      <th>PetalWidthCm的最小值</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>setosa</th>\n",
       "      <td>5.01</td>\n",
       "      <td>0.15</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>versocolor</th>\n",
       "      <td>5.94</td>\n",
       "      <td>0.10</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>virginica</th>\n",
       "      <td>6.59</td>\n",
       "      <td>0.10</td>\n",
       "      <td>6.9</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            SepalLengthC的均值  SepalWidthCm的方差  PetalLengthCm的最大值  \\\n",
       "Species                                                           \n",
       "setosa                 5.01             0.15                1.9   \n",
       "versocolor             5.94             0.10                5.1   \n",
       "virginica              6.59             0.10                6.9   \n",
       "\n",
       "            PetalWidthCm的最小值  \n",
       "Species                       \n",
       "setosa                   0.1  \n",
       "versocolor               1.0  \n",
       "virginica                1.4  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 也可以这么写\n",
    "\n",
    "df.groupby('Species')[feature_cols].agg(\n",
    "    SepalLengthC的均值 = ('SepalLengthCm','mean'),\n",
    "    SepalWidthCm的方差 = ('SepalWidthCm','var'),\n",
    "    PetalLengthCm的最大值 = ('PetalLengthCm','max'),\n",
    "    PetalWidthCm的最小值 = ('PetalWidthCm','min')\n",
    ").rename(index = {\n",
    "                    'Iris-setosa':'setosa',\n",
    "                    'Iris-versicolor':'versocolor',\n",
    "                    'Iris-virginica':'virginica'\n",
    "                })"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div class=\"alert alert-block alert-success\"><b>Step 3</b>：\n",
    "那么如何对不同的列求不同的【自定义】统计量？ \n",
    "请对:\n",
    "    \n",
    "- SepalLengthCm计算IQR\n",
    "- SepalWidthCm计算极差\n",
    "- PetalLengthCm计算几何平均值\n",
    "- PetalWidthCm均值大于1返回True，反之为False\n",
    "</div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SepalLengthCm_iqr</th>\n",
       "      <th>SepalWidthCm_range</th>\n",
       "      <th>PetalLengthCm_geomean</th>\n",
       "      <th>PetalWidthCm_bool</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Iris-setosa</th>\n",
       "      <td>0.40</td>\n",
       "      <td>2.1</td>\n",
       "      <td>1.45</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <td>0.70</td>\n",
       "      <td>1.4</td>\n",
       "      <td>4.23</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-virginica</th>\n",
       "      <td>0.68</td>\n",
       "      <td>1.6</td>\n",
       "      <td>5.53</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 SepalLengthCm_iqr  SepalWidthCm_range  PetalLengthCm_geomean  \\\n",
       "Species                                                                         \n",
       "Iris-setosa                   0.40                 2.1                   1.45   \n",
       "Iris-versicolor               0.70                 1.4                   4.23   \n",
       "Iris-virginica                0.68                 1.6                   5.53   \n",
       "\n",
       "                 PetalWidthCm_bool  \n",
       "Species                             \n",
       "Iris-setosa                  False  \n",
       "Iris-versicolor               True  \n",
       "Iris-virginica                True  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用pd.NameAgg\n",
    "df.groupby('Species')[feature_cols].agg(\n",
    "    SepalLengthCm_iqr = pd.NamedAgg(column = 'SepalLengthCm',aggfunc = lambda x: x.quantile(0.75)-x.quantile(0.25)),\n",
    "    SepalWidthCm_range = pd.NamedAgg(column = 'SepalWidthCm',aggfunc = lambda x: max(x)-min(x)),\n",
    "    PetalLengthCm_geomean = pd.NamedAgg(column = 'PetalLengthCm',aggfunc = lambda x: x.prod()**(1/len(x))),\n",
    "    PetalWidthCm_bool = pd.NamedAgg(column = 'PetalWidthCm',aggfunc = lambda x: True if x.mean() > 1 else False),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SepalLengthCm_iqr</th>\n",
       "      <th>SepalWidthCm_range</th>\n",
       "      <th>PetalLengthCm_geomean</th>\n",
       "      <th>PetalWidthCm_bool</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Iris-setosa</th>\n",
       "      <td>0.40</td>\n",
       "      <td>2.1</td>\n",
       "      <td>1.45</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <td>0.70</td>\n",
       "      <td>1.4</td>\n",
       "      <td>4.23</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-virginica</th>\n",
       "      <td>0.68</td>\n",
       "      <td>1.6</td>\n",
       "      <td>5.53</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 SepalLengthCm_iqr  SepalWidthCm_range  PetalLengthCm_geomean  \\\n",
       "Species                                                                         \n",
       "Iris-setosa                   0.40                 2.1                   1.45   \n",
       "Iris-versicolor               0.70                 1.4                   4.23   \n",
       "Iris-virginica                0.68                 1.6                   5.53   \n",
       "\n",
       "                 PetalWidthCm_bool  \n",
       "Species                             \n",
       "Iris-setosa                  False  \n",
       "Iris-versicolor               True  \n",
       "Iris-virginica                True  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Species')[feature_cols].agg(\n",
    "    SepalLengthCm_iqr = ('SepalLengthCm',lambda x: x.quantile(0.75)-x.quantile(0.25)),\n",
    "    SepalWidthCm_range = ('SepalWidthCm',lambda x: max(x)-min(x)),\n",
    "    PetalLengthCm_geomean = ('PetalLengthCm',lambda x: x.prod()**(1/len(x))),\n",
    "    PetalWidthCm_bool = ('PetalWidthCm', lambda x: True if x.mean() > 1 else False)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SepalLengthCm_iqr</th>\n",
       "      <th>SepalWidthCm_range</th>\n",
       "      <th>PetalLengthCm_geomean</th>\n",
       "      <th>PetalWidthCm_bool</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Species</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Iris-setosa</th>\n",
       "      <td>0.40</td>\n",
       "      <td>2.1</td>\n",
       "      <td>1.45</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-versicolor</th>\n",
       "      <td>0.70</td>\n",
       "      <td>1.4</td>\n",
       "      <td>4.23</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iris-virginica</th>\n",
       "      <td>0.68</td>\n",
       "      <td>1.6</td>\n",
       "      <td>5.53</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 SepalLengthCm_iqr  SepalWidthCm_range  PetalLengthCm_geomean  \\\n",
       "Species                                                                         \n",
       "Iris-setosa                   0.40                 2.1                   1.45   \n",
       "Iris-versicolor               0.70                 1.4                   4.23   \n",
       "Iris-virginica                0.68                 1.6                   5.53   \n",
       "\n",
       "                 PetalWidthCm_bool  \n",
       "Species                             \n",
       "Iris-setosa                  False  \n",
       "Iris-versicolor               True  \n",
       "Iris-virginica                True  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# **kwargs as tuples,\n",
    "df.groupby('Species')[feature_cols].agg(\n",
    "    SepalLengthCm_iqr = ('SepalLengthCm',lambda x: x.quantile(0.75)-x.quantile(0.25)),\n",
    "    SepalWidthCm_range = ('SepalWidthCm',lambda x: max(x)-min(x)),\n",
    "    PetalLengthCm_geomean = ('PetalLengthCm',lambda x: x.prod()**(1/len(x))),\n",
    "    PetalWidthCm_bool = ('PetalWidthCm', lambda x: True if x.mean() > 1 else False)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "generator = df.groupby(['Species']).__iter__()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iris-setosa\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>SepalLengthCm</th>\n",
       "      <th>SepalWidthCm</th>\n",
       "      <th>PetalLengthCm</th>\n",
       "      <th>PetalWidthCm</th>\n",
       "      <th>Species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>46</td>\n",
       "      <td>4.8</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.3</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>47</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.8</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>48</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>49</td>\n",
       "      <td>5.3</td>\n",
       "      <td>3.7</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>50</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.3</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>50 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species\n",
       "0    1            5.1           3.5            1.4           0.2  Iris-setosa\n",
       "1    2            4.9           3.0            1.4           0.2  Iris-setosa\n",
       "2    3            4.7           3.2            1.3           0.2  Iris-setosa\n",
       "3    4            4.6           3.1            1.5           0.2  Iris-setosa\n",
       "4    5            5.0           3.6            1.4           0.2  Iris-setosa\n",
       "..  ..            ...           ...            ...           ...          ...\n",
       "45  46            4.8           3.0            1.4           0.3  Iris-setosa\n",
       "46  47            5.1           3.8            1.6           0.2  Iris-setosa\n",
       "47  48            4.6           3.2            1.4           0.2  Iris-setosa\n",
       "48  49            5.3           3.7            1.5           0.2  Iris-setosa\n",
       "49  50            5.0           3.3            1.4           0.2  Iris-setosa\n",
       "\n",
       "[50 rows x 6 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ctrl+enter\n",
    "group_name, grouped_data = generator.__next__()\n",
    "print(group_name) \n",
    "grouped_data"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
